##################################################################################
##################################################################################
###                         Replication Script for                             ###
### Interest Groups' Recruitment of Incumbent Parliamentarians to Their Boards ###
###                 Article Published in Parliamentary Affairs                 ###
##################################################################################
##################################################################################

# Author: Oliver Huwyler


# Change the language and date formatting to English if it is not already
Sys.setenv(LANG = "EN")
Sys.setlocale("LC_TIME", "English") # key, without this conversion to POSIXct does not work
Sys.getlocale(category = "LC_ALL")


# Set the working directory 
setwd("/scicore/home/bailer/huwylero/Paper1/Analysis")


############
# Packages #
############

# Install libraries if necessary
if("stringr" %in% rownames(installed.packages()) == FALSE) {install.packages("stringr")}
if("lme4" %in% rownames(installed.packages()) == FALSE) {install.packages("lme4")}
if("Zelig" %in% rownames(installed.packages()) == FALSE) {install.packages("Zelig")}
if("car" %in% rownames(installed.packages()) == FALSE) {install.packages("car")}
if("logistf" %in% rownames(installed.packages()) == FALSE) {install.packages("logistf")}
if("dplyr" %in% rownames(installed.packages()) == FALSE) {install.packages("dplyr")}
if("data.table" %in% rownames(installed.packages()) == FALSE) {install.packages("data.table")}
if("ggplot2" %in% rownames(installed.packages()) == FALSE) {install.packages("ggplot2")}
if("sqldf" %in% rownames(installed.packages()) == FALSE) {install.packages("sqldf")}
if("gtools" %in% rownames(installed.packages()) == FALSE) {install.packages("gtools")}
if("broom" %in% rownames(installed.packages()) == FALSE) {install.packages("broom")}
if("tidyverse" %in% rownames(installed.packages()) == FALSE) {install.packages("tidyverse")}
if("stargazer" %in% rownames(installed.packages()) == FALSE) {install.packages("stargazer")}
if("MuMIn" %in% rownames(installed.packages()) == FALSE) {install.packages("MuMIn")}



# Load libraries
library(stringr)
library(lme4)
library(Zelig)
library(car) # for vif
library(logistf) # Firth Logistic Regression
library(dplyr)
library(data.table)
library(ggplot2)
library(sqldf)
library(gtools)
library(broom)
library(tidyverse)
library(stargazer)
library(MuMIn)


#######################
### Custom Function ###
#######################

## Summarizes data.
## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   groupvars: a vector containing names of columns that contain grouping variables
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
  library(plyr)
  
  # New version of length which can handle NA's: if na.rm==T, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }
  
  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     mean = mean   (xx[[col]], na.rm=na.rm),
                     sd   = sd     (xx[[col]], na.rm=na.rm)
                   )
                 },
                 measurevar
  )
  
  # Rename the "mean" column    
  datac <- rename(datac, c("mean" = measurevar))
  
  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
  
  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval: 
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult
  
  return(datac)
}


##################
##################
### MANUSCRIPT ###
##################
##################


###################
### Data Import ###
###################

# Load the full analysable data frame
IGRECRUIT <- readRDS("./Data/MP-IG_Recruitment.rds")


# Note that calculations have been carried out on Scicore, the high-performance computing infrastructure of the University of Basel
# The models in this script require a lot of memory.

# The main models, e.g. the object modelavg cannot be provided due to the file size limit set by Harvard Dataverse.

# For the appendix, they can be provided. Note that calculating these models on your personal computer may take a long time
# or not be possible due to insufficient memory.

# For the appendix
z.outFull <- readRDS(file = "./Data/z.outFull.rds")
z.outGE <- readRDS(file = "./Data/z.outGE.rds")
z.outSE <- readRDS(file = "./Data/z.outSE.rds")
z.outGA <- readRDS(file = "./Data/z.outGA.rds")
z.outSA <- readRDS(file = "./Data/z.outSA.rds")

FirthFull <- readRDS(file = "./Data/FirthFull.rds")
FirthGE <- readRDS(file = "./Data/FirthGE.rds")
FirthSE <- readRDS(file = "./Data/FirthSE.rds")
FirthGA <- readRDS(file = "./Data/FirthGA.rds")
FirthSA <- readRDS(file = "./Data/FirthSA.rds")



######################
### Figure 1, left ###
######################


# 3) For a second graph: What IGs recruit in what year of the term (year 1 to 4)?
IGRECRUIT$termyear <- NA
IGRECRUIT$termyear[IGRECRUIT$year==1985] <- 2
IGRECRUIT$termyear[IGRECRUIT$year==1986] <- 3
IGRECRUIT$termyear[IGRECRUIT$year==1987] <- 4
IGRECRUIT$termyear[IGRECRUIT$year==1988] <- 1
IGRECRUIT$termyear[IGRECRUIT$year==1989] <- 2
IGRECRUIT$termyear[IGRECRUIT$year==1990] <- 3
IGRECRUIT$termyear[IGRECRUIT$year==1991] <- 4
IGRECRUIT$termyear[IGRECRUIT$year==1992] <- 1
IGRECRUIT$termyear[IGRECRUIT$year==1993] <- 2
IGRECRUIT$termyear[IGRECRUIT$year==1994] <- 3
IGRECRUIT$termyear[IGRECRUIT$year==1995] <- 4
IGRECRUIT$termyear[IGRECRUIT$year==1996] <- 1
IGRECRUIT$termyear[IGRECRUIT$year==1997] <- 2
IGRECRUIT$termyear[IGRECRUIT$year==1998] <- 3
IGRECRUIT$termyear[IGRECRUIT$year==1999] <- 4
IGRECRUIT$termyear[IGRECRUIT$year==2000] <- 1
IGRECRUIT$termyear[IGRECRUIT$year==2001] <- 2
IGRECRUIT$termyear[IGRECRUIT$year==2002] <- 3
IGRECRUIT$termyear[IGRECRUIT$year==2003] <- 4
IGRECRUIT$termyear[IGRECRUIT$year==2004] <- 1
IGRECRUIT$termyear[IGRECRUIT$year==2005] <- 2
IGRECRUIT$termyear[IGRECRUIT$year==2006] <- 3
IGRECRUIT$termyear[IGRECRUIT$year==2007] <- 4
IGRECRUIT$termyear[IGRECRUIT$year==2008] <- 1
IGRECRUIT$termyear[IGRECRUIT$year==2009] <- 2
IGRECRUIT$termyear[IGRECRUIT$year==2010] <- 3
IGRECRUIT$termyear[IGRECRUIT$year==2011] <- 4
IGRECRUIT$termyear[IGRECRUIT$year==2012] <- 1
IGRECRUIT$termyear[IGRECRUIT$year==2013] <- 2
IGRECRUIT$termyear[IGRECRUIT$year==2014] <- 3
IGRECRUIT$termyear[IGRECRUIT$year==2015] <- 4
IGRECRUIT$termyear[IGRECRUIT$year==2016] <- 1


# 4) Obtain the sums of what IG type recruits how many MPs when (term year 1-4)
# Important: Exlude the term that contains the year 2001 because a lot of back reporting was done there (not really recruting taking place)
RECRUITOVERTERM <- as.data.frame(table(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$year != 2001 & IGRECRUIT$year != 2000
                                                       & IGRECRUIT$year != 2002 & IGRECRUIT$year != 2003),]$year,
                                       IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$year != 2001 & IGRECRUIT$year != 2000
                                                       & IGRECRUIT$year != 2002 & IGRECRUIT$year != 2003),]$ig_type))
names(RECRUITOVERTERM) <- c("year", "ig_type", "Freq")

# 5) Add the term years to calculate averages
RECRUITOVERTERM$termyear <- NA
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1985] <- 2
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1986] <- 3
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1987] <- 4
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1988] <- 1
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1989] <- 2
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1990] <- 3
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1991] <- 4
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1992] <- 1
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1993] <- 2
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1994] <- 3
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1995] <- 4
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1996] <- 1
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1997] <- 2
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1998] <- 3
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==1999] <- 4
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2000] <- 1
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2001] <- 2
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2002] <- 3
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2003] <- 4
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2004] <- 1
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2005] <- 2
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2006] <- 3
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2007] <- 4
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2008] <- 1
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2009] <- 2
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2010] <- 3
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2011] <- 4
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2012] <- 1
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2013] <- 2
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2014] <- 3
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2015] <- 4
RECRUITOVERTERM$termyear[RECRUITOVERTERM$year==2016] <- 1


RECRUITOVERTERM$ig_type <- as.character(RECRUITOVERTERM$ig_type)

RECRUITOVERTERM$ig_type[RECRUITOVERTERM$ig_type=="11"] <- "Union"
RECRUITOVERTERM$ig_type[RECRUITOVERTERM$ig_type=="21"] <- "Business / trade association"
RECRUITOVERTERM$ig_type[RECRUITOVERTERM$ig_type=="26"] <- "Private firm"
RECRUITOVERTERM$ig_type[RECRUITOVERTERM$ig_type=="27"] <- "Public firm"
RECRUITOVERTERM$ig_type[RECRUITOVERTERM$ig_type=="30"] <- "Institutional organisation"
RECRUITOVERTERM$ig_type[RECRUITOVERTERM$ig_type=="40"] <- "Occupational organisation"
RECRUITOVERTERM$ig_type[RECRUITOVERTERM$ig_type=="50"] <- "Identity group"
RECRUITOVERTERM$ig_type[RECRUITOVERTERM$ig_type=="60"] <- "Hobby / leisure group"
RECRUITOVERTERM$ig_type[RECRUITOVERTERM$ig_type=="70"] <- "Identity group"               # Important: Recode religious group to identity group because there are so few cases
RECRUITOVERTERM$ig_type[RECRUITOVERTERM$ig_type=="80"] <- "Public interest group"
RECRUITOVERTERM$ig_type[RECRUITOVERTERM$ig_type=="SI"] <- "Semi-independent agency"

# Order of the factor
RECRUITOVERTERM$ig_type <- factor((RECRUITOVERTERM$ig_type),
                             levels = rev(c("Business / trade association", "Private firm", "Public firm","Union",
                                        "Occupational organisation", "Public interest group", "Identity group",
                                        "Institutional organisation","Hobby / leisure group", "Religious group",
                                        "Semi-independent agency")))


# 6) Calculate data for the sums of ties taken up in an an average term (excluding the 1999 parliament)
RECRUITOVERTERMAVG <- summarySE(RECRUITOVERTERM, measurevar="Freq", groupvars=c("termyear","ig_type"))
#aggregate(RECRUITOVERTERM$Freq, list(RECRUITOVERTERM$termyear, RECRUITOVERTERM$ig_type), mean)
RECRUITOVERTERMAVG$termyear <- as.character(RECRUITOVERTERMAVG$termyear)


# 7) Plot the tie formation in an average term

recruitinterm <-  ggplot() +
                  geom_bar(aes(y = Freq, x = termyear, fill = ig_type), data = RECRUITOVERTERMAVG,
                                    stat="identity") +
                  theme(panel.background = element_rect(fill = 'white', colour = 'white'),
                      panel.grid.major = element_line(size = 0.25, linetype = 'solid',
                                                      colour = "grey"),
                      panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                                      colour = "grey"),
                      legend.position="bottom", legend.direction="horizontal",
                      legend.title = element_blank()) +
                      scale_x_discrete(name = "", labels = c("1" = "Year 1","2" = "Year 2", "3" = "Year 3", "4" = "Year 4"), expand = c(0, 0) )+
                      scale_y_continuous(name ="Average number of board recruitments", expand = c(0, 0)) +
                      scale_fill_manual(values = c("Semi-independent agency" = "#aeb6bf",
                                               "Business / trade association" = "#800020",
                                               "Public firm" = "hotpink1",
                                               "Hobby / leisure group" =  "#ff8f17",
                                               "Religious group" = "#f5b041",
                                               "Public interest group" = "#58d68d",
                                               "Institutional organisation" = "#f4d03f",
                                               "Identity group" = "#00a126",
                                               "Occupational organisation" = "#5dade2",
                                               "Union" ="#3675c2",
                                               "Private firm" = "#ec7063"))


recruitinterm

completefilename <- paste("Recruitment_During_Term_", as.character(format(Sys.time(), "%Y-%m-%d_%H%M")), ".tif", sep = "")

tiff(completefilename,height = 17, width = 18, units = "cm", compression = "lzw", res = 1200)
recruitinterm
dev.off() 





#######################
### Figure 1, right ###
#######################

# Get data to plot how recruitment by IGs develops over tenure
# Obtain the sums of what IG type recruits how many MPs when (term year 1-4)
# Important: Exlude the term that contains the year 2001 because a lot of back reporting was done there (not really recruting taking place)
RECRUITOVERTENURE <- as.data.frame(table(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$year != 2001),]$tenure,
                                       IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$year != 2001),]$ig_type))
names(RECRUITOVERTENURE) <- c("tenure", "ig_type", "Freq")

RECRUITOVERTENURE$ig_type <- as.character(RECRUITOVERTENURE$ig_type)

RECRUITOVERTENURE$ig_type[RECRUITOVERTENURE$ig_type=="11"] <- "Union"
RECRUITOVERTENURE$ig_type[RECRUITOVERTENURE$ig_type=="21"] <- "Business / trade association"
RECRUITOVERTENURE$ig_type[RECRUITOVERTENURE$ig_type=="26"] <- "Private firm"
RECRUITOVERTENURE$ig_type[RECRUITOVERTENURE$ig_type=="27"] <- "Public firm"
RECRUITOVERTENURE$ig_type[RECRUITOVERTENURE$ig_type=="30"] <- "Institutional organisation"
RECRUITOVERTENURE$ig_type[RECRUITOVERTENURE$ig_type=="40"] <- "Occupational organisation"
RECRUITOVERTENURE$ig_type[RECRUITOVERTENURE$ig_type=="50"] <- "Identity group"
RECRUITOVERTENURE$ig_type[RECRUITOVERTENURE$ig_type=="60"] <- "Hobby / leisure group"
RECRUITOVERTENURE$ig_type[RECRUITOVERTENURE$ig_type=="70"] <- "Identity group"               # Important: Recode religious group to identity group because there are so few cases
RECRUITOVERTENURE$ig_type[RECRUITOVERTENURE$ig_type=="80"] <- "Public interest group"
RECRUITOVERTENURE$ig_type[RECRUITOVERTENURE$ig_type=="SI"] <- "Semi-independent agency"

sum(RECRUITOVERTENURE$Freq)

# For numbers in the paper: Percentages per IG type
IGTYPEFREQUENCIES <- aggregate(RECRUITOVERTENURE$Freq, by = list(RECRUITOVERTENURE$ig_type), sum)
names(IGTYPEFREQUENCIES) <- c("ig_type", "Freq")
IGTYPEFREQUENCIES$percentage <- IGTYPEFREQUENCIES$Freq / sum(IGTYPEFREQUENCIES$Freq)*100


# Calculate new frequencies with the merged categories
RECRUITOVERTENURE <- summarySE(RECRUITOVERTENURE, measurevar="Freq", groupvars=c("tenure","ig_type"))


# Order of the factor
RECRUITOVERTENURE$ig_type <- factor((RECRUITOVERTENURE$ig_type),
                                  levels = rev(c("Business / trade association", "Private firm", "Public firm","Union",
                                                 "Occupational organisation", "Public interest group", "Identity group",
                                                 "Institutional organisation","Hobby / leisure group", "Religious group",
                                                 "Semi-independent agency")))



# 9) Calculate the average number of recruitments per person and ig type

PERSIDTENURE <- unique(IGRECRUIT[,c('pers_id','tenure')])
PERSIDTENURECOUNT <- as.data.frame(table(PERSIDTENURE$tenure))
names(PERSIDTENURECOUNT) <- c("tenure","mpspertenureyear")


RECRUITOVERTENURE <- sqldf("SELECT RECRUITOVERTENURE.ig_type, RECRUITOVERTENURE.tenure, RECRUITOVERTENURE.Freq, PERSIDTENURECOUNT.mpspertenureyear
                            FROM RECRUITOVERTENURE LEFT JOIN PERSIDTENURECOUNT
                            ON
                            RECRUITOVERTENURE.tenure = PERSIDTENURECOUNT.tenure
                            ")

RECRUITOVERTENURE$recruitmentpercapita <- RECRUITOVERTENURE$Freq / RECRUITOVERTENURE$mpspertenureyear


RECRUITOVERTENURE$tenure <- as.numeric(RECRUITOVERTENURE$tenure)




recruittenure <- ggplot() +
#                 geom_bar(aes(y = recruitmentpercapita, x = tenure, fill = ig_type), data = RECRUITOVERTENURE,
#                 stat="identity") +
                  geom_area(data=RECRUITOVERTENURE, aes(x=tenure, y= recruitmentpercapita, fill=ig_type)) +
                  theme(panel.background = element_rect(fill = 'white', colour = 'white'),
                        panel.grid.major = element_line(size = 0.25, linetype = 'solid',
                                                        colour = "grey"),
                        panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                                        colour = "grey"),
                        legend.position="bottom", legend.direction="horizontal",
                        legend.title = element_blank()) +
                  scale_x_continuous(name = "Tenure in Years", limits=c(0,20), minor_breaks = seq(1, 20, 1), expand = c(0, 0))+ # expand = c(0, 0) gets rid of the white space between the axis and the content
                  scale_y_continuous(name ="Number of Dyads Formed Per MP", limits=c(0,1.4), breaks=seq(0,1.4,0.5), expand = c(0, 0)) +
                  scale_fill_manual(values = c("Semi-independent agency" = "#aeb6bf",
                                           "Business / trade association" = "#800020",
                                           "Public firm" = "hotpink1",
                                           "Hobby / leisure group" =  "#ff8f17",
                                           "Religious group" = "#f5b041",
                                           "Public interest group" = "#58d68d",
                                           "Institutional organisation" = "#f4d03f",
                                           "Identity group" = "#00a126",
                                           "Occupational organisation" = "#5dade2",
                                           "Union" ="#3675c2",
                                           "Private firm" = "#ec7063"))+
                  geom_label(data=RECRUITOVERTENURE, aes(x=tenure+0.3, y = 1.35, label = mpspertenureyear))



recruittenure

completefilename <- paste("Recruitment_over_Tenure", as.character(format(Sys.time(), "%Y-%m-%d_%H%M")), ".tif", sep = "")

tiff(completefilename,height = 17, width = 18, units = "cm", compression = "lzw", res = 1200)
recruittenure
dev.off() 






################
### Figure 2 ###
################

# 1) Run the test statistics

### I) General Capital
# a) National MP tenure
result <- t.test(as.numeric(IGRECRUIT$tenure),as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$tenure))

DICHOTEST <- data.frame()
DICHOTEST <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                              result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST) <- c("variable","t", "pvalue", "alternative","method")


DICHOTEST$meanpropformed <- mean(as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$tenure))
DICHOTEST$meanpropall <- mean(as.numeric(IGRECRUIT$tenure))

DICHOTEST$variable[1] <- "National MP tenure (years)"
DICHOTEST$type[1] <- "General Capital"



# b) Regional MP tenure
result <- t.test(as.numeric(IGRECRUIT$regionalmp_exp),as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$regionalmp_exp))

DICHOTEST2 <- data.frame()
DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                              result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")


DICHOTEST2$meanpropformed <- mean(as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$regionalmp_exp))
DICHOTEST2$meanpropall <- mean(as.numeric(IGRECRUIT$regionalmp_exp))

DICHOTEST2$variable[1] <- "Regional MP tenure (years)"
DICHOTEST2$type[1] <- "General Capital"


DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


# c) Party group president tenure
result <- t.test(as.numeric(IGRECRUIT$factpres_exp),as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$factpres_exp))

DICHOTEST2 <- data.frame()
DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                               result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")


DICHOTEST2$meanpropformed <- mean(as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$factpres_exp))
DICHOTEST2$meanpropall <- mean(as.numeric(IGRECRUIT$factpres_exp))

DICHOTEST2$variable[1] <- "Party group president tenure (years)"
DICHOTEST2$type[1] <- "General Capital"


DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


### II) Specific Capital
# a) Committee tenure in IG's policy area
result <- t.test(as.numeric(IGRECRUIT$codl_policyarea_exp_match),as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$codl_policyarea_exp_match))

DICHOTEST2 <- data.frame()
DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                               result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")


DICHOTEST2$meanpropformed <- mean(as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$codl_policyarea_exp_match))
DICHOTEST2$meanpropall <- mean(as.numeric(IGRECRUIT$codl_policyarea_exp_match))

DICHOTEST2$variable[1] <- "Committee tenure in IG's policy area (years)"
DICHOTEST2$type[1] <- "Specific Capital"


DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


# b) Committee president tenure in IG's policy area
result <- t.test(as.numeric(IGRECRUIT$codlpres_policyarea_exp_match),as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$codlpres_policyarea_exp_match))

DICHOTEST2 <- data.frame()
DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                               result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")


DICHOTEST2$meanpropformed <- mean(as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$codlpres_policyarea_exp_match))
DICHOTEST2$meanpropall <- mean(as.numeric(IGRECRUIT$codlpres_policyarea_exp_match))

DICHOTEST2$variable[1] <- "Committee president tenure in IG's policy area (years)"
DICHOTEST2$type[1] <- "Specific Capital"


DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


# c) Regional government tenure in IG's policy area
result <- t.test(as.numeric(IGRECRUIT$regionalgov_policyarea_exp_match),as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$regionalgov_policyarea_exp_match))

DICHOTEST2 <- data.frame()
DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                               result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")


DICHOTEST2$meanpropformed <- mean(as.numeric(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$regionalgov_policyarea_exp_match))
DICHOTEST2$meanpropall <- mean(as.numeric(IGRECRUIT$regionalgov_policyarea_exp_match))

DICHOTEST2$variable[1] <- "Regional government tenure in IG's policy area (years)"
DICHOTEST2$type[1] <- "Specific Capital"


DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


# d) Profession in IG's policy area (0/1)
# Two-Proportions Z-Test
# See: https://stats.stackexchange.com/questions/108226/r-power-prop-test-prop-test-and-unequal-sample-sizes-in-a-b-tests

result <- prop.test(c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$prof_policyarea_match == "1"),]$prof_policyarea_match), # Number of women in the sample
                      length(IGRECRUIT[which(IGRECRUIT$prof_policyarea_match == "1"),]$prof_policyarea_match)), # Number of women in the population
                    c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$prof_policyarea_match), # Total number of obs the sample
                      length(IGRECRUIT$prof_policyarea_match))) # Total number of observations

DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                            result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")
DICHOTEST2 <- DICHOTEST2[1,] # Keep only the first row

DICHOTEST2$meanpropformed <- (length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$prof_policyarea_match == "1"),]$prof_policyarea_match)/length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$prof_policyarea_match))*100
DICHOTEST2$meanpropall <- (length(IGRECRUIT[which(IGRECRUIT$prof_policyarea_match == "1"),]$prof_policyarea_match)/length(IGRECRUIT$prof_policyarea_match))*100

DICHOTEST2$variable[1] <- "Profession in IG's policy area (0/1)"
DICHOTEST2$type[1] <- "Specific Capital"

DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


# e) Existing IG ties in IG's policy area (0/1)
# Two-Proportions Z-Test
# See: https://stats.stackexchange.com/questions/108226/r-power-prop-test-prop-test-and-unequal-sample-sizes-in-a-b-tests

result <- prop.test(c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$ig_policyarea_match == "1"),]$ig_policyarea_match), # Number of women in the sample
                      length(IGRECRUIT[which(IGRECRUIT$ig_policyarea_match == "1"),]$ig_policyarea_match)), # Number of women in the population
                    c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$ig_policyarea_match), # Total number of obs the sample
                      length(IGRECRUIT$ig_policyarea_match))) # Total number of observations

DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                               result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")
DICHOTEST2 <- DICHOTEST2[1,] # Keep only the first row

DICHOTEST2$meanpropformed <- (length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$ig_policyarea_match == "1"),]$ig_policyarea_match)/length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$ig_policyarea_match))*100
DICHOTEST2$meanpropall <- (length(IGRECRUIT[which(IGRECRUIT$ig_policyarea_match == "1"),]$ig_policyarea_match)/length(IGRECRUIT$ig_policyarea_match))*100

DICHOTEST2$variable[1] <- "Existing IG ties in IG's policy area (0/1)"
DICHOTEST2$type[1] <- "Specific Capital"

DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


### III) General access
# a) Regional MP (0/1)
result <- prop.test(c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$regionalmp == "1"),]$regionalmp), # Number of women in the sample
                      length(IGRECRUIT[which(IGRECRUIT$regionalmp == "1"),]$regionalmp)), # Number of women in the population
                    c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$regionalmp), # Total number of obs the sample
                      length(IGRECRUIT$regionalmp))) # Total number of observations

DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                               result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")
DICHOTEST2 <- DICHOTEST2[1,] # Keep only the first row

DICHOTEST2$meanpropformed <- (length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$regionalmp == "1"),]$regionalmp)/length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$regionalmp))*100
DICHOTEST2$meanpropall <- (length(IGRECRUIT[which(IGRECRUIT$regionalmp == "1"),]$regionalmp)/length(IGRECRUIT$regionalmp))*100

DICHOTEST2$variable[1] <- "Regional MP (0/1)"
DICHOTEST2$type[1] <- "General Access"

DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


# b) Regional government member (0/1)
result <- prop.test(c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$regionalgov == "1"),]$regionalgov), # Number of women in the sample
                      length(IGRECRUIT[which(IGRECRUIT$regionalgov == "1"),]$regionalgov)), # Number of women in the population
                    c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$regionalgov), # Total number of obs the sample
                      length(IGRECRUIT$regionalgov))) # Total number of observations

DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                               result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")
DICHOTEST2 <- DICHOTEST2[1,] # Keep only the first row

DICHOTEST2$meanpropformed <- (length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$regionalgov == "1"),]$regionalgov)/length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$regionalgov))*100
DICHOTEST2$meanpropall <- (length(IGRECRUIT[which(IGRECRUIT$regionalgov == "1"),]$regionalgov)/length(IGRECRUIT$regionalgov))*100

DICHOTEST2$variable[1] <- "Regional government member (0/1)"
DICHOTEST2$type[1] <- "General Access"

DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


# c) Party group president (0/1)
result <- prop.test(c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$factpres == "1"),]$factpres), # Number of women in the sample
                      length(IGRECRUIT[which(IGRECRUIT$factpres == "1"),]$factpres)), # Number of women in the population
                    c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$factpres), # Total number of obs the sample
                      length(IGRECRUIT$factpres))) # Total number of observations

DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                               result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")
DICHOTEST2 <- DICHOTEST2[1,] # Keep only the first row

DICHOTEST2$meanpropformed <- (length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$factpres == "1"),]$factpres)/length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$factpres))*100
DICHOTEST2$meanpropall <- (length(IGRECRUIT[which(IGRECRUIT$factpres == "1"),]$factpres)/length(IGRECRUIT$factpres))*100

DICHOTEST2$variable[1] <- "Party group president (0/1)"
DICHOTEST2$type[1] <- "General Access"

DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


### IV) Specific access
# a) Committee in IG's policy area (0/1)
result <- prop.test(c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$codl_policyarea_match == "1"),]$codl_policyarea_match), # Number of women in the sample
                      length(IGRECRUIT[which(IGRECRUIT$codl_policyarea_match == "1"),]$codl_policyarea_match)), # Number of women in the population
                    c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$codl_policyarea_match), # Total number of obs the sample
                      length(IGRECRUIT$codl_policyarea_match))) # Total number of observations

DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                               result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")
DICHOTEST2 <- DICHOTEST2[1,] # Keep only the first row

DICHOTEST2$meanpropformed <- (length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$codl_policyarea_match == "1"),]$codl_policyarea_match)/length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$codl_policyarea_match))*100
DICHOTEST2$meanpropall <- (length(IGRECRUIT[which(IGRECRUIT$codl_policyarea_match == "1"),]$codl_policyarea_match)/length(IGRECRUIT$codl_policyarea_match))*100

DICHOTEST2$variable[1] <- "Committee in IG's policy area (0/1)"
DICHOTEST2$type[1] <- "Specific Access"

DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


# b) Committee president in IG's policy area (0/1)
result <- prop.test(c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$codlpres_policyarea_match == "1"),]$codlpres_policyarea_match), # Number of women in the sample
                      length(IGRECRUIT[which(IGRECRUIT$codlpres_policyarea_match == "1"),]$codlpres_policyarea_match)), # Number of women in the population
                    c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$codlpres_policyarea_match), # Total number of obs the sample
                      length(IGRECRUIT$codlpres_policyarea_match))) # Total number of observations

DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                               result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")
DICHOTEST2 <- DICHOTEST2[1,] # Keep only the first row

DICHOTEST2$meanpropformed <- (length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$codlpres_policyarea_match == "1"),]$codlpres_policyarea_match)/length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$codlpres_policyarea_match))*100
DICHOTEST2$meanpropall <- (length(IGRECRUIT[which(IGRECRUIT$codlpres_policyarea_match == "1"),]$codlpres_policyarea_match)/length(IGRECRUIT$codlpres_policyarea_match))*100

DICHOTEST2$variable[1] <- "Committee president in IG's policy area (0/1)"
DICHOTEST2$type[1] <- "Specific Access"

DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)


# c) Regional government member in IG's policy area (0/1)
result <- prop.test(c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$regionalgov_policyarea_match == "1"),]$regionalgov_policyarea_match), # Number of women in the sample
                      length(IGRECRUIT[which(IGRECRUIT$regionalgov_policyarea_match == "1"),]$regionalgov_policyarea_match)), # Number of women in the population
                    c(length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$regionalgov_policyarea_match), # Total number of obs the sample
                      length(IGRECRUIT$regionalgov_policyarea_match))) # Total number of observations

DICHOTEST2 <- data.frame(cbind(result[['data.name']],result[['statistic']], result[['p.value']],
                               result[['alternative']], result[['method']]), stringsAsFactors=FALSE)
colnames(DICHOTEST2) <- c("variable","t", "pvalue", "alternative","method")
DICHOTEST2 <- DICHOTEST2[1,] # Keep only the first row

DICHOTEST2$meanpropformed <- (length(IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$regionalgov_policyarea_match == "1"),]$regionalgov_policyarea_match)/length(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$regionalgov_policyarea_match))*100
DICHOTEST2$meanpropall <- (length(IGRECRUIT[which(IGRECRUIT$regionalgov_policyarea_match == "1"),]$regionalgov_policyarea_match)/length(IGRECRUIT$regionalgov_policyarea_match))*100

DICHOTEST2$variable[1] <- "Regional government member in IG's policy area (0/1)"
DICHOTEST2$type[1] <- "Specific Access"

DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2, result)



# 2) Store means in 1 instead of 2 columns
DICHOTEST2 <- DICHOTEST
DICHOTEST$Data <- "All possible MP-IG combinations"
DICHOTEST$meanpropformed <- NULL
colnames(DICHOTEST)[match("meanpropall",colnames(DICHOTEST))] <- "meanprop" # rename


DICHOTEST2$Data <- "Successful recruitment only"
DICHOTEST2$meanpropall <- NULL
colnames(DICHOTEST2)[match("meanpropformed",colnames(DICHOTEST2))] <- "meanprop" # rename

DICHOTEST <- rbind(DICHOTEST, DICHOTEST2)
rm(DICHOTEST2)

# 3) Generate significance starts in a separate table
# a) Generate a separate data frame
STARS <- subset(DICHOTEST, select = c("variable", "type", "pvalue"))
STARS <- sqldf("SELECT DISTINCT STARS.*
                       FROM STARS
                    ")
STARS$pvalue <- as.numeric(STARS$pvalue)

# b) Calculate the stars variable
STARS$stars <- ifelse(STARS$pvalue<0.001, STARS$stars <- "***", ifelse(STARS$pvalue<0.01, STARS$stars <- "**", ifelse(STARS$pvalue<0.05, STARS$stars <- "*", "")))


# 4 Order the variables' factor levels according to row order
DICHOTEST <- mutate(DICHOTEST, variable = factor(DICHOTEST$variable, rev(DICHOTEST$variable[1:14])))

# 5) Plot the first half of the figure for the continuous variables
# Important: To get a version with labels, comment out the following line in the figure code:
# scale_x_discrete(labels = "") + # Important: This is only hear because the different text lengths in Figure a and b lead to differently sized

Figure3a <- ggplot() +
  geom_bar(data=DICHOTEST[which(grepl("t-test",DICHOTEST$method)),], aes(x=variable, y=meanprop, fill=factor(Data)),stat="identity", position="dodge") +
  geom_rect(aes(xmin=0, xmax=3.5, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") + # background per capital / access type
  geom_rect(aes(xmin=3.5, xmax=8.5, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") + # background per capital / access type
  geom_rect(aes(xmin=8.5, xmax=11.5, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") + # background per capital / access type
  geom_rect(aes(xmin=11.5, xmax=14.5, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") + # background per capital / access type
  # geom_errorbar( aes(x=Service, ymin=mean-ci, ymax=mean+ci), position="dodge") +
  # geom_text(size=2, aes(x = Characteristic, y = PercentCharPres, label = Characteristic, group = Data), position=position_dodge(0.9),vjust=0.5, hjust = -0.2) +
  theme(panel.background = element_rect(fill = 'white', colour = 'white'),
        panel.grid.major.x = element_line(size = 0.25, linetype = 'solid',
                                          colour = "grey"),
        panel.grid.minor.x = element_line(size = 0.25, linetype = 'solid',
                                          colour = "grey"),
        legend.position="bottom", legend.direction="horizontal",
        legend.title = element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.y = element_blank()) +
  scale_fill_manual("legend", values = c("All possible MP-IG combinations" = "#F8766D",
                                         "Successful recruitment only" = "#00BFC4"),
                    labels = c("All possible MP-IG combinations" = "All possible MP-IG combinations  ",
                               "Successful recruitment only" = "Successful recruitment only  ")) +
  scale_y_continuous(position = "right", #labels = function(x) paste0(x, "%"), 
                     limits = c(0, 8), breaks=c(0, 1, 2, 3, 4, 5, 6, 7), expand = c(0, 0)) +
  scale_x_discrete(labels = "") + # Important: This is only hear because the different text lengths in Figure a and b lead to differently sized
  coord_flip() +
  ## If all cases has a larger mean / proportion value
  annotate("segment", x=5.65, xend=6.35,y=DICHOTEST$meanprop[1]+0.3, yend=DICHOTEST$meanprop[1]+0.3, color = "black", size = 0.75) + # vertical black line
  annotate("segment", x=5.65, xend=5.65,y=DICHOTEST$meanprop[1]+0.1, yend=DICHOTEST$meanprop[1]+0.3, color = "black", size = 0.75) + # lower black line
  annotate("segment", x=6.35, xend=6.35,y=DICHOTEST$meanprop[15]+0.1, yend=DICHOTEST$meanprop[1]+0.3, color = "black", size = 0.75) + # upper black line
  annotate(geom = "text", x = 6, y = DICHOTEST$meanprop[1]+0.5, hjust = 0, label = paste0(STARS$stars[1]),
           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text
#  annotate("segment", x=4.65, xend=5.35,y=DICHOTEST$meanprop[2]+0.3, yend=DICHOTEST$meanprop[2]+0.3, color = "black", size = 0.75) + # vertical black line
#  annotate("segment", x=4.65, xend=4.65,y=DICHOTEST$meanprop[2]+0.1, yend=DICHOTEST$meanprop[2]+0.3, color = "black", size = 0.75) + # lower black line
#  annotate("segment", x=5.35, xend=5.35,y=DICHOTEST$meanprop[16]+0.1, yend=DICHOTEST$meanprop[2]+0.3, color = "black", size = 0.75) + # upper black line
#  annotate(geom = "text", x = 5, y = DICHOTEST$meanprop[2]+0.5, hjust = 0, label = paste0(STARS$stars[2]),
#           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text
  annotate("segment", x=3.65, xend=4.35,y=DICHOTEST$meanprop[3]+0.3, yend=DICHOTEST$meanprop[3]+0.3, color = "black", size = 0.75) + # vertical black line
  annotate("segment", x=3.65, xend=3.65,y=DICHOTEST$meanprop[3]+0.1, yend=DICHOTEST$meanprop[3]+0.3, color = "black", size = 0.75) + # lower black line
  annotate("segment", x=4.35, xend=4.35,y=DICHOTEST$meanprop[17]+0.1, yend=DICHOTEST$meanprop[3]+0.3, color = "black", size = 0.75) + # upper black line
  annotate(geom = "text", x = 4, y = DICHOTEST$meanprop[3]+0.5, hjust = 0, label = paste0(STARS$stars[3]),
           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text
  ## If succesful recruitment has a larger mean / proportion value
  annotate("segment", x=2.65, xend=3.35,y=DICHOTEST$meanprop[18]+0.3, yend=DICHOTEST$meanprop[18]+0.3, color = "black", size = 0.75) + # vertical black line
  annotate("segment", x=2.65, xend=2.65,y=DICHOTEST$meanprop[4]+0.1, yend=DICHOTEST$meanprop[18]+0.3, color = "black", size = 0.75) + # lower black line
  annotate("segment", x=3.35, xend=3.35,y=DICHOTEST$meanprop[18]+0.1, yend=DICHOTEST$meanprop[18]+0.3, color = "black", size = 0.75) + # upper black line
  annotate(geom = "text", x = 3, y = DICHOTEST$meanprop[18]+0.5, hjust = 0, label = paste0(STARS$stars[4]),
           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text
#  annotate("segment", x=1.65, xend=2.35,y=DICHOTEST$meanprop[19]+0.3, yend=DICHOTEST$meanprop[19]+0.3, color = "black", size = 0.75) + # vertical black line
#  annotate("segment", x=1.65, xend=1.65,y=DICHOTEST$meanprop[5]+0.1, yend=DICHOTEST$meanprop[19]+0.3, color = "black", size = 0.75) + # lower black line
#    annotate("segment", x=2.35, xend=2.35,y=DICHOTEST$meanprop[19]+0.1, yend=DICHOTEST$meanprop[19]+0.3, color = "black", size = 0.75) + # upper black line
#    annotate(geom = "text", x = 2, y = DICHOTEST$meanprop[19]+0.5, hjust = 0, label = paste0(STARS$stars[5]),
#           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text
  annotate("segment", x=0.65, xend=1.35,y=DICHOTEST$meanprop[20]+0.3, yend=DICHOTEST$meanprop[20]+0.3, color = "black", size = 0.75) + # vertical black line
  annotate("segment", x=0.65, xend=0.65,y=DICHOTEST$meanprop[6]+0.1, yend=DICHOTEST$meanprop[20]+0.3, color = "black", size = 0.75) + # lower black line
  annotate("segment", x=1.35, xend=1.35,y=DICHOTEST$meanprop[20]+0.1, yend=DICHOTEST$meanprop[20]+0.3, color = "black", size = 0.75) + # upper black line
  annotate(geom = "text", x = 1, y = DICHOTEST$meanprop[20]+0.5, hjust = 0, label = paste0(STARS$stars[6]),
           colour = "black", parse = FALSE)  # hjust = 0 for left allignment of text
  
  
Figure3a

# 6) Plot the second half of the figure for the dichotomous variables

Figure3b <- ggplot() +
  geom_bar(data=DICHOTEST[which(grepl("2-sample test for equality of proportions",DICHOTEST$method)),], aes(x=variable, y=meanprop, fill=factor(Data)),stat="identity", position="dodge") +
  geom_rect(aes(xmin=0, xmax=3.5, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") + # background per capital / access type
  geom_rect(aes(xmin=3.5, xmax=6.5, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") + # background per capital / access type
  geom_rect(aes(xmin=6.5, xmax=11.5, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") + # background per capital / access type
  geom_rect(aes(xmin=11.5, xmax=14.5, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") + # background per capital / access type
  # geom_errorbar( aes(x=Service, ymin=mean-ci, ymax=mean+ci), position="dodge") +
  # geom_text(size=2, aes(x = Characteristic, y = PercentCharPres, label = Characteristic, group = Data), position=position_dodge(0.9),vjust=0.5, hjust = -0.2) +
  theme(panel.background = element_rect(fill = 'white', colour = 'white'),
        panel.grid.major.x = element_line(size = 0.25, linetype = 'solid',
                                          colour = "grey"),
        panel.grid.minor.x = element_line(size = 0.25, linetype = 'solid',
                                          colour = "grey"),
        legend.position="bottom", legend.direction="horizontal",
        legend.title = element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.y = element_blank()) +
  scale_fill_manual("legend", values = c("All possible MP-IG combinations" = "#F8766D",
                                         "Successful recruitment only" = "#00BFC4"),
                    labels = c("All possible MP-IG combinations" = "All possible MP-IG combinations  ",
                               "Successful recruitment only" = "Successful recruitment only  ")) +
  scale_y_continuous(position = "left", labels = function(x) paste0(x, "%"), 
                     limits = c(0, 80), breaks=c(0, 10, 20, 30, 40, 50, 60, 70), expand = c(0, 0)) +
  scale_x_discrete(labels = "") + # Important: This is only hear because the different text lengths in Figure a and b lead to differently sized
  coord_flip() +
  ## If succesful recruitment has a larger mean / proportion value
  annotate("segment", x=7.65, xend=8.35,y=DICHOTEST$meanprop[21]+3, yend=DICHOTEST$meanprop[21]+3, color = "black", size = 0.75) + # vertical black line
  annotate("segment", x=7.65, xend=7.65,y=DICHOTEST$meanprop[7]+1, yend=DICHOTEST$meanprop[21]+3, color = "black", size = 0.75) + # lower black line
  annotate("segment", x=8.35, xend=8.35,y=DICHOTEST$meanprop[21]+1, yend=DICHOTEST$meanprop[21]+3, color = "black", size = 0.75) + # upper black line
  annotate(geom = "text", x = 8, y = DICHOTEST$meanprop[21]+5, hjust = 0, label = paste0(STARS$stars[7]),
           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text
  annotate("segment", x=6.65, xend=7.35,y=DICHOTEST$meanprop[22]+3, yend=DICHOTEST$meanprop[22]+3, color = "black", size = 0.75) + # vertical black line
  annotate("segment", x=6.65, xend=6.65,y=DICHOTEST$meanprop[8]+1, yend=DICHOTEST$meanprop[22]+3, color = "black", size = 0.75) + # lower black line
  annotate("segment", x=7.35, xend=7.35,y=DICHOTEST$meanprop[22]+1, yend=DICHOTEST$meanprop[22]+3, color = "black", size = 0.75) + # upper black line
  annotate(geom = "text", x = 7, y = DICHOTEST$meanprop[22]+5, hjust = 0, label = paste0(STARS$stars[8]),
           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text
#  annotate("segment", x=5.65, xend=6.35,y=DICHOTEST$meanprop[23]+3, yend=DICHOTEST$meanprop[23]+3, color = "black", size = 0.75) + # vertical black line
#  annotate("segment", x=5.65, xend=5.65,y=DICHOTEST$meanprop[9]+1, yend=DICHOTEST$meanprop[23]+3, color = "black", size = 0.75) + # lower black line
#  annotate("segment", x=6.35, xend=6.35,y=DICHOTEST$meanprop[23]+1, yend=DICHOTEST$meanprop[23]+3, color = "black", size = 0.75) + # upper black line
#  annotate(geom = "text", x = 6, y = DICHOTEST$meanprop[23]+5, hjust = 0, label = paste0(STARS$stars[9]),
#           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text
  annotate("segment", x=4.65, xend=5.35,y=DICHOTEST$meanprop[24]+3, yend=DICHOTEST$meanprop[24]+3, color = "black", size = 0.75) + # vertical black line
  annotate("segment", x=4.65, xend=4.65,y=DICHOTEST$meanprop[10]+1, yend=DICHOTEST$meanprop[24]+3, color = "black", size = 0.75) + # lower black line
  annotate("segment", x=5.35, xend=5.35,y=DICHOTEST$meanprop[24]+1, yend=DICHOTEST$meanprop[24]+3, color = "black", size = 0.75) + # upper black line
  annotate(geom = "text", x = 5, y = DICHOTEST$meanprop[24]+5, hjust = 0, label = paste0(STARS$stars[10]),
           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text
  ## If all cases has a larger mean / proportion value
  annotate("segment", x=3.65, xend=4.35,y=DICHOTEST$meanprop[11]+3, yend=DICHOTEST$meanprop[11]+3, color = "black", size = 0.75) + # vertical black line
  annotate("segment", x=3.65, xend=3.65,y=DICHOTEST$meanprop[11]+1, yend=DICHOTEST$meanprop[11]+3, color = "black", size = 0.75) + # lower black line
  annotate("segment", x=4.35, xend=4.35,y=DICHOTEST$meanprop[25]+1, yend=DICHOTEST$meanprop[11]+3, color = "black", size = 0.75) + # upper black line
  annotate(geom = "text", x = 4, y = DICHOTEST$meanprop[11]+5, hjust = 0, label = paste0(STARS$stars[11]),
           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text
  ## If succesful recruitment has a larger mean / proportion value
  annotate("segment", x=2.65, xend=3.35,y=DICHOTEST$meanprop[26]+3, yend=DICHOTEST$meanprop[26]+3, color = "black", size = 0.75) + # vertical black line
  annotate("segment", x=2.65, xend=2.65,y=DICHOTEST$meanprop[12]+1, yend=DICHOTEST$meanprop[26]+3, color = "black", size = 0.75) + # lower black line
  annotate("segment", x=3.35, xend=3.35,y=DICHOTEST$meanprop[26]+1, yend=DICHOTEST$meanprop[26]+3, color = "black", size = 0.75) + # upper black line
  annotate(geom = "text", x = 3, y = DICHOTEST$meanprop[26]+5, hjust = 0, label = paste0(STARS$stars[12]),
           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text
  annotate("segment", x=1.65, xend=2.35,y=DICHOTEST$meanprop[27]+3, yend=DICHOTEST$meanprop[27]+3, color = "black", size = 0.75) + # vertical black line
  annotate("segment", x=1.65, xend=1.65,y=DICHOTEST$meanprop[13]+1, yend=DICHOTEST$meanprop[27]+3, color = "black", size = 0.75) + # lower black line
  annotate("segment", x=2.35, xend=2.35,y=DICHOTEST$meanprop[27]+1, yend=DICHOTEST$meanprop[27]+3, color = "black", size = 0.75) + # upper black line
  annotate(geom = "text", x = 2, y = DICHOTEST$meanprop[27]+5, hjust = 0, label = paste0(STARS$stars[13]),
           colour = "black", parse = FALSE) + # hjust = 0 for left allignment of text  
  annotate("segment", x=0.65, xend=1.35,y=DICHOTEST$meanprop[28]+3, yend=DICHOTEST$meanprop[28]+3, color = "black", size = 0.75) + # vertical black line
  annotate("segment", x=0.65, xend=0.65,y=DICHOTEST$meanprop[14]+1, yend=DICHOTEST$meanprop[28]+3, color = "black", size = 0.75) + # lower black line
  annotate("segment", x=1.35, xend=1.35,y=DICHOTEST$meanprop[28]+1, yend=DICHOTEST$meanprop[28]+3, color = "black", size = 0.75) + # upper black line
  annotate(geom = "text", x = 1, y = DICHOTEST$meanprop[28]+5, hjust = 0, label = paste0(STARS$stars[14]),
           colour = "black", parse = FALSE)  # hjust = 0 for left allignment of text  
  
 
Figure3b


# 7) Save both parts of the figure

completefilename <- paste("Article_Figure3_1stHalf_", as.character(format(Sys.time(), "%Y-%m-%d_%H%M")), ".tif", sep = "")

tiff(completefilename,height = 13.6, width = 11.785, units = "cm", compression = "lzw", res = 1200)
Figure3a
dev.off()


completefilename <- paste("Article_Figure3_2ndHalf_", as.character(format(Sys.time(), "%Y-%m-%d_%H%M")), ".tif", sep = "")

tiff(completefilename,height = 13.6, width = 11.785, units = "cm", compression = "lzw", res = 1200)
Figure3b
dev.off()





###############
### Table 2 ###
###############


# Get an overview in a table
VAR1 <- summarySE(IGRECRUIT, measurevar="tieformed")
VAR1$Variable <- colnames(VAR1)[3]
colnames(VAR1)[3] <- "mean"
VAR1$min <- min(IGRECRUIT$tieformed)
VAR1$max <- max(IGRECRUIT$tieformed)


VAR2 <- summarySE(IGRECRUIT, measurevar="tenure")
VAR2$Variable <- colnames(VAR2)[3]
colnames(VAR2)[3] <- "mean"
VAR2$min <- min(IGRECRUIT$tenure)
VAR2$max <- max(IGRECRUIT$tenure)


VAR3 <- summarySE(IGRECRUIT, measurevar="regionalmp_exp")
VAR3$Variable <- colnames(VAR3)[3]
colnames(VAR3)[3] <- "mean"
VAR3$min <- min(IGRECRUIT$regionalmp_exp)
VAR3$max <- max(IGRECRUIT$regionalmp_exp)


VAR4 <- summarySE(IGRECRUIT, measurevar="regionalgov_exp")
VAR4$Variable <- colnames(VAR4)[3]
colnames(VAR4)[3] <- "mean"
VAR4$min <- min(IGRECRUIT$regionalgov_exp)
VAR4$max <- max(IGRECRUIT$regionalgov_exp)


VAR5 <- summarySE(IGRECRUIT, measurevar="factpres_exp")
VAR5$Variable <- colnames(VAR5)[3]
colnames(VAR5)[3] <- "mean"
VAR5$min <- min(IGRECRUIT$factpres_exp)
VAR5$max <- max(IGRECRUIT$factpres_exp)


VAR6 <- summarySE(IGRECRUIT, measurevar="codl_policyarea_exp_match")
VAR6$Variable <- colnames(VAR6)[3]
colnames(VAR6)[3] <- "mean"
VAR6$min <- min(IGRECRUIT$codl_policyarea_exp_match)
VAR6$max <- max(IGRECRUIT$codl_policyarea_exp_match)


VAR7 <- summarySE(IGRECRUIT, measurevar="codlpres_policyarea_exp_match")
VAR7$Variable <- colnames(VAR7)[3]
colnames(VAR7)[3] <- "mean"
VAR7$min <- min(IGRECRUIT$codlpres_policyarea_exp_match)
VAR7$max <- max(IGRECRUIT$codlpres_policyarea_exp_match)


VAR8 <- summarySE(IGRECRUIT, measurevar="prof_policyarea_match")
VAR8$Variable <- colnames(VAR8)[3]
colnames(VAR8)[3] <- "mean"
VAR8$min <- min(IGRECRUIT$prof_policyarea_match)
VAR8$max <- max(IGRECRUIT$prof_policyarea_match)


VAR9 <- summarySE(IGRECRUIT, measurevar="regionalgov_policyarea_exp_match")
VAR9$Variable <- colnames(VAR9)[3]
colnames(VAR9)[3] <- "mean"
VAR9$min <- min(IGRECRUIT$regionalgov_policyarea_exp_match)
VAR9$max <- max(IGRECRUIT$regionalgov_policyarea_exp_match)


VAR10 <- summarySE(IGRECRUIT, measurevar="ig_policyarea_match")
VAR10$Variable <- colnames(VAR10)[3]
colnames(VAR10)[3] <- "mean"
VAR10$min <- min(IGRECRUIT$ig_policyarea_match)
VAR10$max <- max(IGRECRUIT$ig_policyarea_match)


VAR11 <- summarySE(IGRECRUIT, measurevar="regionalmp")
VAR11$Variable <- colnames(VAR11)[3]
colnames(VAR11)[3] <- "mean"
VAR11$min <- min(IGRECRUIT$regionalmp)
VAR11$max <- max(IGRECRUIT$regionalmp)


VAR12 <- summarySE(IGRECRUIT, measurevar="regionalgov")
VAR12$Variable <- colnames(VAR12)[3]
colnames(VAR12)[3] <- "mean"
VAR12$min <- min(IGRECRUIT$regionalgov)
VAR12$max <- max(IGRECRUIT$regionalgov)


VAR13 <- summarySE(IGRECRUIT, measurevar="factpres", na.rm = T)
VAR13$Variable <- colnames(VAR13)[3]
colnames(VAR13)[3] <- "mean"
VAR13$min <- min(IGRECRUIT$factpres, na.rm = T)
VAR13$max <- max(IGRECRUIT$factpres, na.rm = T)


VAR14 <- summarySE(IGRECRUIT, measurevar="codl_policyarea_match", na.rm = T)
VAR14$Variable <- colnames(VAR14)[3]
colnames(VAR14)[3] <- "mean"
VAR14$min <- min(IGRECRUIT$codl_policyarea_match, na.rm = T)
VAR14$max <- max(IGRECRUIT$codl_policyarea_match, na.rm = T)


VAR15 <- summarySE(IGRECRUIT, measurevar="codlpres_policyarea_match", na.rm = T)
VAR15$Variable <- colnames(VAR15)[3]
colnames(VAR15)[3] <- "mean"
VAR15$min <- min(IGRECRUIT$codlpres_policyarea_match, na.rm = T)
VAR15$max <- max(IGRECRUIT$codlpres_policyarea_match, na.rm = T)


VAR16 <- summarySE(IGRECRUIT, measurevar="regionalgov_policyarea_match", na.rm = T)
VAR16$Variable <- colnames(VAR16)[3]
colnames(VAR16)[3] <- "mean"
VAR16$min <- min(IGRECRUIT$regionalgov_policyarea_match, na.rm = T)
VAR16$max <- max(IGRECRUIT$regionalgov_policyarea_match, na.rm = T)


VAR17 <- summarySE(IGRECRUIT, measurevar="ideology_match", na.rm = T)
VAR17$Variable <- colnames(VAR17)[3]
colnames(VAR17)[3] <- "mean"
VAR17$min <- min(IGRECRUIT$ideology_match, na.rm = T)
VAR17$max <- max(IGRECRUIT$ideology_match, na.rm = T)


VAR18 <- summarySE(IGRECRUIT, measurevar="canton_match", na.rm = T)
VAR18$Variable <- colnames(VAR18)[3]
colnames(VAR18)[3] <- "mean"
VAR18$min <- min(IGRECRUIT$canton_match, na.rm = T)
VAR18$max <- max(IGRECRUIT$canton_match, na.rm = T)


VAR19 <- summarySE(IGRECRUIT, measurevar="totalties", na.rm = T)
VAR19$Variable <- colnames(VAR19)[3]
colnames(VAR19)[3] <- "mean"
VAR19$min <- min(IGRECRUIT$totalties, na.rm = T)
VAR19$max <- max(IGRECRUIT$totalties, na.rm = T)


VAR20 <- summarySE(IGRECRUIT, measurevar="seats_bothchambers", na.rm = T)
VAR20$Variable <- colnames(VAR20)[3]
colnames(VAR20)[3] <- "mean"
VAR20$min <- min(IGRECRUIT$seats_bothchambers, na.rm = T)
VAR20$max <- max(IGRECRUIT$seats_bothchambers, na.rm = T)


VAR21 <- summarySE(IGRECRUIT, measurevar="electionyear", na.rm = T)
VAR21$Variable <- colnames(VAR21)[3]
colnames(VAR21)[3] <- "mean"
VAR21$min <- min(IGRECRUIT$electionyear, na.rm = T)
VAR21$max <- max(IGRECRUIT$electionyear, na.rm = T)

IGRECRUIT$chamber2 <- ifelse(IGRECRUIT$chamber == "SR", 1, 0)
VAR22 <- summarySE(IGRECRUIT, measurevar="chamber2", na.rm = T)
VAR22$Variable <- colnames(VAR22)[3]
colnames(VAR22)[3] <- "mean"
VAR22$min <- min(IGRECRUIT$chamber2, na.rm = T)
VAR22$max <- max(IGRECRUIT$chamber2, na.rm = T)
IGRECRUIT$chamber2 <- NULL

VAR23 <- summarySE(IGRECRUIT, measurevar="age", na.rm = T)
VAR23$Variable <- colnames(VAR23)[3]
colnames(VAR23)[3] <- "mean"
VAR23$min <- min(IGRECRUIT$age, na.rm = T)
VAR23$max <- max(IGRECRUIT$age, na.rm = T)

IGRECRUIT$gender2 <- ifelse(IGRECRUIT$gender == "f", 1, 0)
VAR24 <- summarySE(IGRECRUIT, measurevar="gender2", na.rm = T)
VAR24$Variable <- colnames(VAR24)[3]
colnames(VAR24)[3] <- "mean"
VAR24$min <- min(IGRECRUIT$gender2, na.rm = T)
VAR24$max <- max(IGRECRUIT$gender2, na.rm = T)
IGRECRUIT$gender2 <- NULL

VAR25 <- summarySE(IGRECRUIT, measurevar="moderateness", na.rm = T)
VAR25$Variable <- colnames(VAR25)[3]
colnames(VAR25)[3] <- "mean"
VAR25$min <- min(IGRECRUIT$moderateness, na.rm = T)
VAR25$max <- max(IGRECRUIT$moderateness, na.rm = T)


# Create a single table
TABLE2 <- rbind(VAR1, VAR2, VAR3, VAR4, VAR5, VAR6, VAR7, VAR8, VAR9, VAR10,
                VAR11, VAR12,
                VAR13, VAR14, VAR15, VAR16, VAR17, VAR25, VAR20, VAR22, VAR21, VAR19, VAR18,
                VAR23, VAR24)
rm(VAR1, VAR2, VAR3, VAR4, VAR5, VAR6, VAR7, VAR8, VAR9, VAR10,VAR11, VAR12, VAR13, VAR14,
   VAR15, VAR16, VAR17, VAR18, VAR19, VAR20, VAR21,VAR22, VAR23, VAR24, VAR25)

TABLE2$.id <- NULL

# Reorder variables
TABLE2 <- TABLE1[,c(6,2,3,7,8,1,4,5)]
TABLE2$se <- NULL
TABLE2$ci <- NULL

stargazer(TABLE1, type= "html", digits=3, summary = FALSE,
          out="Table2.htm")





############### 
### Table 3 ###
###############

### Preparation: Random Sample of 0

# 1) Sample a dataframe that contains only 0s and has the exact same length as there are 1s in the dataframe (i.e. 6307)
# Run a test
set.seed(123)
sample_n(IGRECRUIT[which(IGRECRUIT$tieformed == 0),], length(which(IGRECRUIT$tieformed == 1)))

# 2) Now generate many random data frames in a list that only contain tieformed == 0 with a length of 6307
set.seed(123)
DFLIST <- lapply(1:1000, function(x) sample_n(IGRECRUIT[which(IGRECRUIT$tieformed == 0 & IGRECRUIT$year != 2001 & (is.na(IGRECRUIT$moderateness) != T) ),],
                                              length(which(IGRECRUIT$tieformed == 1 & IGRECRUIT$year != 2001 & (is.na(IGRECRUIT$moderateness) != T)))))

# 3) Append the new dataframes with the all the rows where IGRECRUIT$tieformed == 1
# Run a test
set.seed(123)
rbind(sample_n(IGRECRUIT[which(IGRECRUIT$tieformed == 0),], length(which(IGRECRUIT$tieformed == 1))), IGRECRUIT[which(IGRECRUIT$tieformed == 1),])

# 4) Now, append the rows for all new data frames
DFLIST <- lapply(DFLIST, function(x) {
  rbind(x, IGRECRUIT[which(IGRECRUIT$tieformed == 1 & IGRECRUIT$year != 2001 & (is.na(IGRECRUIT$moderateness) != T)),])
})


### Analysis


#### Carry out the model estimations
### 1) FULL MODEL
Sys.time()
estimates <- lapply(DFLIST, FUN = glmer, formula = tieformed ~ age + gender
                    + chamber
                    + electionyear
                    + tenure + regionalmp + regionalmp_exp + regionalgov #+ regionalgov_exp
                    + factpres + factpres_exp
                    + seats_bothchambers
                    + totalties
                    + codl_policyarea_match + codl_policyarea_exp_match
                    + codlpres_policyarea_match + codlpres_policyarea_exp_match
                    + prof_policyarea_match
                    + regionalgov_policyarea_match + regionalgov_policyarea_exp_match
                    + ig_policyarea_match +
                    + canton_match 
                    + ideology_match +
                    + moderateness + 
                      (1 | year) + (1 | org_id) + (1 | pers_id),
                    family= binomial, nAGQ = 0) # nAGQ = 0 drastically reduces the estimation time

Sys.time()

summary(estimates[[2]])

# Note: 10'000 dfs causes an error: Error in initializePtr() : Cholmod error 'out of memory' at file:../Core/cholmod_memory.c, line 147


# Read carefully: https://uoftcoders.github.io/rcourse/lec11-model-selection.html

# Get the models: They are weighted according to AICc
modelavg <- summary(model.avg(estimates))

# Assign equal weights to all models:
Weights(modelavg) <- rep(1, length(estimates)) # assigned weights are rescaled to sum to 1 (i.e. every model weighs the same)


summary(modelavg)

# Save the Full Model
save(modelavg, file = "Full_Model.RData")

### 2) GENERAL EXPERIENCE

Sys.time()
estimatesGE <- lapply(DFLIST, FUN = glmer, formula = tieformed ~ tenure
                      + regionalmp_exp
                     # + regionalgov_exp
                      + factpres_exp
                      + (1 | year) + (1 | org_id) + (1 | pers_id),
                      family= binomial, nAGQ = 0) # nAGQ = 0 drastically reduces the estimation time

Sys.time()

summary(estimatesGE[[1]])

# Note: 10'000 dfs causes an error: Error in initializePtr() : Cholmod error 'out of memory' at file:../Core/cholmod_memory.c, line 147


# Read carefully: https://uoftcoders.github.io/rcourse/lec11-model-selection.html

# Get the models: They are weighted according to AICc
modelavgGE <- summary(model.avg(estimatesGE))

# Assign equal weights to all models:
Weights(modelavgGE) <- rep(1, length(estimatesGE)) # assigned weights are rescaled to sum to 1 (i.e. every model weighs the same)

summary(modelavgGE)


### 3) GENERAL ACCESS

Sys.time()
estimatesGA <- lapply(DFLIST, FUN = glmer, formula = tieformed ~ regionalmp
                      + regionalgov
                      + factpres
                      + (1 | year) + (1 | org_id) + (1 | pers_id),
                      family= binomial, nAGQ = 0) # nAGQ = 0 drastically reduces the estimation time

Sys.time()

summary(estimatesGA[[1]])

# Note: 10'000 dfs causes an error: Error in initializePtr() : Cholmod error 'out of memory' at file:../Core/cholmod_memory.c, line 147


# Read carefully: https://uoftcoders.github.io/rcourse/lec11-model-selection.html

# Get the models: They are weighted according to AICc
modelavgGA <- summary(model.avg(estimatesGA))

# Assign equal weights to all models:
Weights(modelavgGA) <- rep(1, length(estimatesGA)) # assigned weights are rescaled to sum to 1 (i.e. every model weighs the same)


summary(modelavgGA)


### 4) SPECIFIC EXPERIENCE

Sys.time()
estimatesSE <- lapply(DFLIST, FUN = glmer, formula = tieformed ~ codl_policyarea_exp_match
                      + codlpres_policyarea_exp_match
                      + prof_policyarea_match
                      + regionalgov_policyarea_exp_match
                      + ig_policyarea_match
                      + (1 | year) + (1 | org_id) + (1 | pers_id),
                      family= binomial, nAGQ = 0) # nAGQ = 0 drastically reduces the estimation time

Sys.time()

summary(estimatesSE[[1]])

# Note: 10'000 dfs causes an error: Error in initializePtr() : Cholmod error 'out of memory' at file:../Core/cholmod_memory.c, line 147


# Read carefully: https://uoftcoders.github.io/rcourse/lec11-model-selection.html

# Get the models: They are weighted according to AICc
modelavgSE <- summary(model.avg(estimatesSE))

# Assign equal weights to all models:
Weights(modelavgSE) <- rep(1, length(estimatesSE)) # assigned weights are rescaled to sum to 1 (i.e. every model weighs the same)


summary(modelavgSE)




### 5) SPECIFIC ACCESS

Sys.time()
estimatesSA <- lapply(DFLIST, FUN = glmer, formula = tieformed ~ codl_policyarea_match
                      + codlpres_policyarea_match
                      + regionalgov_policyarea_match
                      + (1 | year) + (1 | org_id) + (1 | pers_id),
                      family= binomial, nAGQ = 0) # nAGQ = 0 drastically reduces the estimation time

Sys.time()

summary(estimatesSA[[1]])

# Note: 10'000 dfs causes an error: Error in initializePtr() : Cholmod error 'out of memory' at file:../Core/cholmod_memory.c, line 147


# Read carefully: https://uoftcoders.github.io/rcourse/lec11-model-selection.html

# Get the models: They are weighted according to AICc
modelavgSA <- summary(model.avg(estimatesSA))

# Assign equal weights to all models:
Weights(modelavgSA) <- rep(1, length(estimatesSA)) # assigned weights are rescaled to sum to 1 (i.e. every model weighs the same)


summary(modelavgSA)


############################
## Saving Averaged Models ##
############################


# 1) Save all required data frames to a list
avmodlist <- c("modelavg","modelavgGE","modelavgGA","modelavgSE","modelavgSA")


# 2) Create the file name (including the path)
completefilename <- paste("Complete_Average_Models_", as.character(format(Sys.time(), "%Y-%m-%d_%H%M")), ".RData", sep = "")


# 3) Save the data frames
save(list=avmodlist, file = completefilename)




#######################
#######################
### ONLINE APPENDIX ###
#######################
#######################



##################
### Figure III ###
##################

# Generate Graph 1 with ig_type
# 9) Get data to plot how recruitment by IGs develops over years

# Important: Exlude the term that contains the year 2001 because a lot of back reporting was done there (not really recruting taking place)
RECRUITOVERYEARS <- as.data.frame(table(IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$year,
                                        IGRECRUIT[which(IGRECRUIT$tieformed == 1),]$ig_type))
names(RECRUITOVERYEARS) <- c("year", "ig_type", "Freq")

RECRUITOVERYEARS$ig_type <- as.character(RECRUITOVERYEARS$ig_type)

RECRUITOVERYEARS$ig_type[RECRUITOVERYEARS$ig_type=="11"] <- "Union"
RECRUITOVERYEARS$ig_type[RECRUITOVERYEARS$ig_type=="21"] <- "Business / trade association"
RECRUITOVERYEARS$ig_type[RECRUITOVERYEARS$ig_type=="26"] <- "Private firm"
RECRUITOVERYEARS$ig_type[RECRUITOVERYEARS$ig_type=="27"] <- "Public firm"
RECRUITOVERYEARS$ig_type[RECRUITOVERYEARS$ig_type=="30"] <- "Institutional organisation"
RECRUITOVERYEARS$ig_type[RECRUITOVERYEARS$ig_type=="40"] <- "Occupational organisation"
RECRUITOVERYEARS$ig_type[RECRUITOVERYEARS$ig_type=="50"] <- "Identity group"
RECRUITOVERYEARS$ig_type[RECRUITOVERYEARS$ig_type=="60"] <- "Hobby / leisure group"
RECRUITOVERYEARS$ig_type[RECRUITOVERYEARS$ig_type=="70"] <- "Religious group"
RECRUITOVERYEARS$ig_type[RECRUITOVERYEARS$ig_type=="80"] <- "Public interest group"
RECRUITOVERYEARS$ig_type[RECRUITOVERYEARS$ig_type=="SI"] <- "Semi-independent agency"

sum(RECRUITOVERYEARS$Freq)

# Order of the factor
RECRUITOVERYEARS$ig_type <- factor((RECRUITOVERYEARS$ig_type),
                                   levels = c("Business / trade association", "Private firm", "Public firm","Union",
                                              "Occupational organisation", "Identity group","Institutional organisation",
                                              "Hobby / leisure group", "Religious group", "Public interest group",
                                              "Semi-independent agency"))


# Plot the development of recruitment over years by ig_type
RECRUITOVERYEARS$year <- as.character(RECRUITOVERYEARS$year)
RECRUITOVERYEARS$year <- as.numeric(RECRUITOVERYEARS$year)


recruityears <- ggplot() +
  geom_rect(aes(xmin=1985, xmax=1987, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") +
  geom_rect(aes(xmin=1987, xmax=1991, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") +
  geom_rect(aes(xmin=1991, xmax=1995, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") +
  geom_rect(aes(xmin=1995, xmax=1999, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") +
  geom_rect(aes(xmin=1999, xmax=2003, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") +
  geom_rect(aes(xmin=2003, xmax=2007, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") +
  geom_rect(aes(xmin=2007, xmax=2011, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") +
  geom_rect(aes(xmin=2011, xmax=2015, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") +
  geom_rect(aes(xmin=2015, xmax=2016, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") +
  geom_area(data=RECRUITOVERYEARS, aes(x=year, y= Freq, fill=ig_type)) +
  theme(panel.background = element_rect(fill = 'white', colour = 'white'),
        panel.grid.major = element_line(size = 0.25, linetype = 'solid',
                                        colour = "grey"),
        panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                        colour = "grey"),
        legend.position="bottom", legend.direction="horizontal",
        legend.title = element_blank()) +
  scale_x_continuous(name ="Year", breaks = seq(from = 1985, to = 2016, by = 5), expand = c(0, 0)) + 
  scale_y_continuous(name ="Number of MP-IG Dyads Formed", limits=c(0,549), expand = c(0, 0)) +
  scale_fill_manual(values = c("Semi-independent agency" = "#aeb6bf",
                               "Business / trade association" = "#800020",
                               "Public firm" = "hotpink1",
                               "Hobby / leisure group" =  "#dc7633",
                               "Religious group" = "#f5b041",
                               "Public interest group" = "#f4d03f",
                               "Institutional organisation" = "#58d68d",
                               "Identity group" = "#45b39d",
                               "Occupational organisation" = "#5dade2",
                               "Union" ="#a569bd",
                               "Private firm" = "#ec7063"))


recruityears

completefilename <- paste("Recruitment_over_Years_by_IG-Type", as.character(format(Sys.time(), "%Y-%m-%d_%H%M")), ".tif", sep = "")

tiff(completefilename,height = 15.6, width = 22.1, units = "cm", compression = "lzw", res = 1200)
recruityears
dev.off() 

### Additional numbers for the paper:
YEARLYRECRUITMENTSUMS <- aggregate(RECRUITOVERYEARS$Freq, by=list(year=RECRUITOVERYEARS$year), FUN=sum)
names(YEARLYRECRUITMENTSUMS) <- c("year", "totalannualrecruitments")
RECRUITOVERYEARS <- sqldf("SELECT RECRUITOVERYEARS.*, YEARLYRECRUITMENTSUMS.totalannualrecruitments
                          FROM YEARLYRECRUITMENTSUMS LEFT JOIN RECRUITOVERYEARS
                          ON
                          YEARLYRECRUITMENTSUMS.year = RECRUITOVERYEARS.year
                          ")
rm(YEARLYRECRUITMENTSUMS)
RECRUITOVERYEARS$share <- RECRUITOVERYEARS$Freq / RECRUITOVERYEARS$totalannualrecruitments

ggplot() + geom_line(aes(y = share, x = year, colour = ig_type),
                     data = RECRUITOVERYEARS, stat="identity", size = 1.5)

### Information on decades
RECRUITOVERYEARS$decade <- paste(as.numeric(gsub("[0-9]{1}$", "", RECRUITOVERYEARS$year)),"0", sep="")

summarySE(RECRUITOVERYEARS[which(RECRUITOVERYEARS$year != 2001),], measurevar="totalannualrecruitments", groupvars=c("decade"))


### Not for the paper, but for Stefanie's book chapter
recruityears <- ggplot() +
  geom_rect(aes(xmin=1985, xmax=1987, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") +
  geom_rect(aes(xmin=1987, xmax=1991, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") +
  geom_rect(aes(xmin=1991, xmax=1995, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") +
  geom_rect(aes(xmin=1995, xmax=1999, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") +
  geom_rect(aes(xmin=1999, xmax=2003, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") +
  geom_rect(aes(xmin=2003, xmax=2007, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") +
  geom_rect(aes(xmin=2007, xmax=2011, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") +
  geom_rect(aes(xmin=2011, xmax=2015, ymin= 0,ymax=Inf), alpha=0.2, fill="gray88") +
  geom_rect(aes(xmin=2015, xmax=2016, ymin= 0,ymax=Inf), alpha=0.2, fill="gray57") +
  geom_line(data=RECRUITOVERYEARS, aes(x=year, y= Freq, color=ig_type)) +
  theme(panel.background = element_rect(fill = 'white', colour = 'white'),
        panel.grid.major = element_line(size = 0.25, linetype = 'solid',
                                        colour = "grey"),
        panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
                                        colour = "grey"),
        legend.position="bottom", legend.direction="horizontal",
        legend.title = element_blank()) +
  scale_x_continuous(name ="Year", breaks = seq(from = 1985, to = 2016, by = 5), expand = c(0, 0)) + 
  scale_y_continuous(name ="Number of MP-IG Dyads Formed", limits=c(0,280), expand = c(0, 0)) +
  scale_fill_manual(values = c("Semi-independent agency" = "#aeb6bf",
                               "Business / trade association" = "#800020",
                               "Public firm" = "hotpink1",
                               "Hobby / leisure group" =  "#dc7633",
                               "Religious group" = "#f5b041",
                               "Public interest group" = "#f4d03f",
                               "Institutional organisation" = "#58d68d",
                               "Identity group" = "#45b39d",
                               "Occupational organisation" = "#5dade2",
                               "Union" ="#a569bd",
                               "Private firm" = "#ec7063"))


recruityears

completefilename <- paste("Recruitment_over_Years_by_IG-Type", as.character(format(Sys.time(), "%Y-%m-%d_%H%M")), ".tif", sep = "")

tiff(completefilename,height = 15.6, width = 22.1, units = "cm", compression = "lzw", res = 1200)
recruityears
dev.off() 




################
### Table IV ###
################

# Multicollinearity test based on mixed effects logistic regression

LogMod <- glm(tieformed ~ age + gender
                + chamber
                + electionyear
                + tenure + regionalmp + regionalmp_exp + regionalgov #+ regionalgov_exp # removed due to multicollinearity on 25/05/2020
                + factpres + factpres_exp
                + seats_bothchambers
                + totalties
                + codl_policyarea_match + codl_policyarea_exp_match
                + codlpres_policyarea_match + codlpres_policyarea_exp_match
                + prof_policyarea_match
                + regionalgov_policyarea_match + regionalgov_policyarea_exp_match
                + ig_policyarea_match +
                + canton_match + ideology_match + moderateness, 
                family = binomial(link = "logit"), data = IGRECRUIT)

summary(LogMod)

# Check for multicollinearity in the model
# Note: Analyze the magnitude of multicollinearity by considering the size of the VIF.
# A rule of thumb is that if VIF of a variable > 10 then multicollinearity is high.
# A cutoff of 5 is also commonly used.
vif(LogMod)





###############
### Table V ###
###############

# Rare Events Logit 
# Adapted from: http://ftp.uni-bayreuth.de/math/statlib/R/CRAN/doc/vignettes/Zelig/relogit.pdf

### Specification 1: One Tau with Prior Correction and Bias Correction ###

# Specify tau in the model: tau is a a vector containing either one or two values for tau, the true population fraction of ones

# Count the number of unique occurences of 1 (tie formed) and 0 (no tie formed) for tau
table(IGRECRUIT$tieformed)


# 1) Full model
Sys.time()
z.outFull <- zelig(tieformed ~ age + gender
                    + chamber
                    + electionyear
                    + tenure + regionalmp + regionalmp_exp + regionalgov #+ regionalgov_exp
                    + factpres + factpres_exp
                    + seats_bothchambers
                    + totalties
                    + codl_policyarea_match + codl_policyarea_exp_match
                    + codlpres_policyarea_match + codlpres_policyarea_exp_match
                    + prof_policyarea_match
                    + regionalgov_policyarea_match + regionalgov_policyarea_exp_match
                    + ig_policyarea_match
                    + canton_match
                    + ideology_match
                    + moderateness
                    + pers_id
                    #+ org_id
                    + year, 
                    data = IGRECRUIT, model = "relogit", tau = 5783/1136361) # tau =  the proportion of formed ties (1) to non-formed ties (0)
Sys.time()


# Summarize the model output:
summary(z.outFull)

# Set the explanatory variables to their means:
x.outfull <- setx(z.outFull)

# Simulate quantities of interest:
s.outfull <- sim(z.outFull, x = x.outfull)
summary(s.outfull)
plot(s.outfull)




# 2) GENERAL EXPERIENCE
Sys.time()
z.outGE <- zelig(tieformed ~ tenure
                + regionalmp_exp
                # + regionalgov_exp
                + factpres_exp
                + pers_id
                #+ org_id
                + year, 
                data = IGRECRUIT, model = "relogit", tau = 5783/1136361) # tau =  the proportion of formed ties (1) to non-formed ties (0)
Sys.time()


# Summarize the model output:
summary(z.outGE)

# Set the explanatory variables to their means:
x.outGE <- setx(z.outGE)

# Simulate quantities of interest:
s.outGE  <- sim(z.outGE , x = x.outGE )
summary(s.outGE )
plot(s.outGE)



# 3) GENERAL ACCESS
Sys.time()
z.outGA <- zelig(tieformed ~ regionalmp
                + regionalgov
                + factpres
                + pers_id
                #+ org_id
                + year, 
                data = IGRECRUIT, model = "relogit", tau = 5783/1136361) # tau =  the proportion of formed ties (1) to non-formed ties (0)
Sys.time()


# Summarize the model output:
summary(z.outGA)

# Set the explanatory variables to their means:
x.outGA <- setx(z.outGA)

# Simulate quantities of interest:
s.outGA <- sim(z.outGA, x = x.outGA)
summary(s.outGA)
plot(s.outGA)



# 4) SPECIFIC EXPERIENCE
Sys.time()
z.outSE <- zelig(tieformed ~ codl_policyarea_exp_match
                + codlpres_policyarea_exp_match
                + prof_policyarea_match
                + regionalgov_policyarea_exp_match
                + ig_policyarea_match
                + pers_id
                #+ org_id
                + year, 
                data = IGRECRUIT, model = "relogit", tau = 5783/1136361) # tau =  the proportion of formed ties (1) to non-formed ties (0)
Sys.time()


# Summarize the model output:
summary(z.outSE)

# Set the explanatory variables to their means:
x.outSE <- setx(z.outSE)

# Simulate quantities of interest:
s.outSE <- sim(z.outSE, x = x.outSE)
summary(s.outSE)
plot(s.outSE)


# 5) SPECIFIC ACCESS
Sys.time()
z.outSA <- zelig(tieformed ~ codl_policyarea_match
                 + codlpres_policyarea_match
                 + regionalgov_policyarea_match
                 + pers_id
                 #+ org_id
                 + year, 
                 data = IGRECRUIT, model = "relogit", tau = 5783/1136361) # tau =  the proportion of formed ties (1) to non-formed ties (0)
Sys.time()


# Summarize the model output:
summary(z.outSA)

# Set the explanatory variables to their means:
x.outSA <- setx(z.outSA)

# Simulate quantities of interest:
s.outSA <- sim(z.outSA, x = x.outSA)
summary(s.outSA)
plot(s.outSA)





################
### Table VI ###
################

# Firth's Bias-Reduced Logistic Regression

# If you have a larger count of the rarer of the two events, say 1000 (even better if it's 2000)
# in a sample of 100'000 with the same low event rate (1% to 2%) you can use logistic regression.
# The estimation will have to be done using "penalized likelihood" method (also called Firth's penalized
# likelihood approach, after it's intentor)
# (cf. https://de.slideshare.net/tejamoy/logistic-regression-with-low-event-rate-rare-events)



# 1) Full model
Sys.time()
FirthFull <- logistf(tieformed ~ age + gender
                     + chamber
                     + electionyear
                     + tenure + regionalmp + regionalmp_exp + regionalgov #+ regionalgov_exp
                     + factpres + factpres_exp
                     + seats_bothchambers
                     + totalties
                     + codl_policyarea_match + codl_policyarea_exp_match
                     + codlpres_policyarea_match + codlpres_policyarea_exp_match
                     + prof_policyarea_match
                     + regionalgov_policyarea_match + regionalgov_policyarea_exp_match
                     + ig_policyarea_match
                     + canton_match
                     + ideology_match
                     + moderateness
                     + pers_id
                     #+ org_id
                     + year, 
                     data = IGRECRUIT, firth=TRUE, pl=TRUE)
Sys.time()


# Summarize the model output:
summary(FirthFull)




# 2) GENERAL EXPERIENCE
Sys.time()
FirthGE <- logistf(tieformed ~ tenure
                   + regionalmp_exp
                   # + regionalgov_exp
                   + factpres_exp
                   + pers_id
                   #+ org_id
                   + year, 
                   data = IGRECRUIT, firth=TRUE, pl=TRUE)Sys.time()


# Summarize the model output:
summary(FirthGE)



# 3) GENERAL ACCESS
Sys.time()
FirthGA <- logistf(tieformed ~ regionalmp
                   + regionalgov
                   + factpres
                   + pers_id
                   #+ org_id
                   + year, 
                   data = IGRECRUIT, firth=TRUE, pl=TRUE)
Sys.time()


# Summarize the model output:
summary(FirthGA)



# 4) SPECIFIC EXPERIENCE
Sys.time()
FirthSE <- logistf(tieformed ~ codl_policyarea_exp_match
                   + codlpres_policyarea_exp_match
                   + prof_policyarea_match
                   + regionalgov_policyarea_exp_match
                   + ig_policyarea_match
                   + pers_id
                   #+ org_id
                   + year, 
                   data = IGRECRUIT, firth=TRUE, pl=TRUE)
Sys.time()

# Summarize the model output:
summary(FirthSE)


# 5) SPECIFIC ACCESS
Sys.time()
FirthSA <- logistf(tieformed ~ codl_policyarea_match
                   + codlpres_policyarea_match
                   + regionalgov_policyarea_match
                   + pers_id
                   #+ org_id
                   + year, 
                   data = IGRECRUIT, firth=TRUE, pl=TRUE)
Sys.time()


# Summarize the model output:
summary(FirthSA)

        



########################
### Save environment ###
########################

# Create the file name automatically and save it
completefilename <- paste("IG-Recruitment_Analysis", as.character(format(Sys.time(), "%Y-%m-%d_%H%M")), ".RData", sep = "")

save.image(file = completefilename)